# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

"""
Generate a Sphinx inventory for a Doxygen site.

Requires the XML and HTML output.
"""

from __future__ import annotations

import argparse
import re
import urllib.parse
import xml.etree.ElementTree as ET
from pathlib import Path

import sphinx.util.inventory
from fake_inventory import (
    FakeBuildEnvironment,
    FakeBuilder,
    FakeDomain,
    FakeDomainsContainer,
    FakeEnv,
    FakeObject,
)


def parse_member_id(member_id: str) -> str:
    # group__adbc-statement_1gab81e98bf3871f9b7ce1445fad39eddbd ->
    # gab81e98bf3871f9b7ce1445fad39eddbd
    result = member_id.split("_")[-1][1:]
    assert len(result) in (33, 34)
    return result


def scrape_links(item_id_to_url, root):
    """Parse Doxygen XML files to extract definitions."""
    for compounddef in root.findall("compounddef"):
        kind = compounddef.attrib.get("kind")
        if kind == "dir":
            # Ignore, this is generated for a directory
            continue
        elif kind in ("class", "file", "group", "struct"):
            outer_domain = "c"
            if kind == "file":
                name = compounddef.find("compoundname").text
                file_id = compounddef.attrib["id"]
                yield ("std", name, "doc", "", f"{file_id}.html")
            elif kind in {"class", "struct"}:
                name = compounddef.find("compoundname").text
                anchor = compounddef.attrib["id"]
                url = item_id_to_url[anchor]
                if kind == "class" or "::" in name:
                    outer_domain = "cpp"
                yield (outer_domain, name, kind, anchor, url)

            for memberdef in compounddef.findall(".//memberdef"):
                member_kind = memberdef.attrib.get("kind")
                if member_kind == "define":
                    domain = "c"
                    name = memberdef.find("name").text
                    typ = "macro"
                elif member_kind == "function":
                    domain = outer_domain
                    qualified = memberdef.find("qualifiedname")
                    if qualified is not None:
                        name = qualified.text
                    else:
                        name = memberdef.find("name").text
                    typ = "function"
                elif member_kind == "typedef":
                    domain = "c"
                    name = memberdef.find("name").text
                    typ = "type"
                elif member_kind == "variable":
                    domain = outer_domain
                    name = memberdef.find("qualifiedname").text
                    typ = "member"
                elif member_kind == "enum":
                    domain = "c"
                    name = memberdef.find("name").text
                    typ = "enum"
                elif member_kind == "friend":
                    continue
                else:
                    raise NotImplementedError(
                        f"<memberdef kind=\"{memberdef.attrib['kind']}\"> not supported"
                    )

                anchor = parse_member_id(memberdef.attrib["id"])
                if anchor not in item_id_to_url:
                    # TODO: I think this is due to bad Doxygen config
                    print(
                        f"WARNING: Could not find URL for "
                        f":{domain}:{typ}:`{name}` ({anchor})"
                    )
                    continue
                url = item_id_to_url[anchor]
                yield (domain, name, typ, anchor, url)
        elif kind == "namespace":
            # Ignore, this seems to be emitted for things referencing std::
            continue
        elif kind == "page":
            # Ignore (README.md etc)
            continue
        else:
            raise NotImplementedError(f'<compounddef kind="{kind}"> not supported')


def make_fake_domains(
    html_root: Path, xml_root: Path, base_url: str
) -> dict[str, FakeDomain]:
    if not base_url.endswith("/"):
        base_url += "/"

    # Scrape the HTML pages generated by Doxygen for anchors.  This gives us a
    # mapping from item hex codes to URLs.
    item_id_to_url = {}
    html_name = re.compile(r'name="([^\"]+)"')
    for index in html_root.rglob("*.html"):
        item_id_to_url[index.stem] = str(index.relative_to(html_root))
        with index.open() as source:
            matches = html_name.findall(source.read())
            for m in matches:
                url = str(index.relative_to(html_root))
                item_id_to_url[m] = url

    domains = {
        "c": FakeDomain("c", objects=[]),
        "cpp": FakeDomain("cpp", objects=[]),
        "std": FakeDomain("std", objects=[]),
    }

    # Scrape the XML index generated by Doxygen.  We can use the previous
    # index to generate the proper URL.
    for index in xml_root.rglob("*.xml"):
        tree = ET.parse(index)
        root = tree.getroot()

        for domain, name, typ, anchor, url in scrape_links(item_id_to_url, root):
            url = urllib.parse.urljoin(base_url, url)
            domains[domain].objects.append(
                FakeObject(
                    name=name,
                    dispname=name,
                    typ=typ,
                    docname=url,
                    anchor=anchor,
                    prio=1,
                )
            )

            print(f"Found :{domain}:{typ}:`{name}`")

    while False:
        url = urllib.parse.urljoin(base_url, url)
        domains[domain].objects.append(
            FakeObject(
                name=name,
                dispname=name,
                typ=typ,
                docname=url,
                anchor=anchor,
                prio=1,
            )
        )

    return domains


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("project", help="Project name")
    parser.add_argument("version", help="Project version")
    parser.add_argument(
        "--html-path", type=Path, help="Path to the Doxygen generated HTML"
    )
    parser.add_argument(
        "--xml-path", type=Path, help="Path to the Doxygen generated XML"
    )
    parser.add_argument("url", help="Eventual base URL of the Doxygen docs")
    parser.add_argument(
        "output_dir", type=Path, help="Where to write the new objects.inv"
    )

    args = parser.parse_args()

    domains = make_fake_domains(args.html_path, args.xml_path, args.url)
    config = FakeEnv(project=args.project, version=args.version)
    env = FakeBuildEnvironment(
        config=config, domains=FakeDomainsContainer.from_dict(domains)
    )

    output = args.output_dir / "objects.inv"
    sphinx.util.inventory.InventoryFile.dump(
        str(output),
        env,
        FakeBuilder(),
    )
    print("Wrote", output)


if __name__ == "__main__":
    main()
